package com.esd.download;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

import javax.annotation.Resource;

import org.apache.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.esd.common.CatDao;
import com.esd.common.MongoDBUtil;
import com.esd.config.PageConfig;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.WebWindow;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlPage;

public class HtmlunitDownLoadHtml {
	private static Logger log = Logger.getLogger(HtmlunitDownLoadHtml.class);
	private static WebClient webClient = null;

	public static void main(String[] args) {
		String url = "http://www.baidu.com";
		Document document = Jsoup.parse("");
		Elements elements = document.getElementsByTag("body");
		String script = "<script type=\"text/javascript\">window.location.href='" + url + "'</script>";
		elements.get(0).append(script);

		System.out.println(document.html());
	}

	public static void close() {
		if (webClient != null) {
			webClient.close();
		}
	}

	public HtmlunitDownLoadHtml() {
		if (webClient == null) {
			webClient = new WebClient(BrowserVersion.INTERNET_EXPLORER_11);
			new SimpleConectionListener(webClient);
		}
	}

	public Document download(PageConfig pageConfig,CatDao dao,String[] domain,MongoDBUtil mongoDBUtil) {

		try {
			if (pageConfig.isJavaScriptEnabled()) {
				webClient.getOptions().setJavaScriptEnabled(true); // 启用JS解释器，默认为true
			} else {
				webClient.getOptions().setJavaScriptEnabled(false); // 启用JS解释器，默认为true
			}
			if (pageConfig.isCssEnabled()) {
				webClient.getOptions().setCssEnabled(true); // 禁用css支持
			} else {
				webClient.getOptions().setCssEnabled(false); // 禁用css支持
			}
			webClient.getOptions().setThrowExceptionOnScriptError(false); // js运行错误时，是否抛出异常
			webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
			webClient.getOptions().setTimeout(10000); // 设置连接超时时间
			// long l = System.currentTimeMillis();
			// HtmlPage htmlPage = webClient.getPage(pageConfig.getUrl());
			HtmlPage htmlPage = null;
			Document document = null;
			String url = pageConfig.getUrl();
			WebWindow webWindos = null;
			WebWindow w0 = webClient.getWebWindows().get(0);
			url = url.replaceAll("&amp;", "&");
			webWindos = webClient.openWindow(new URL(url), "page", w0);
			//System.out.println("url:"+url);
			Page page = webWindos.getEnclosedPage();
			WebResponse response = page.getWebResponse();
			//System.out.println(response.getContentType());// application/xhtml+xml
			log.debug(url+"==response==["+response.getContentType()+"]");
			// System.out.println(page.isHtmlPage());
			if (page.isHtmlPage()) {
				log.debug(url+"===[我是页面htmlpage]");
				htmlPage = (HtmlPage) page;
				// System.out.println(System.currentTimeMillis() - l);
				if (pageConfig.getSleep() > 0) {
					Thread.sleep(pageConfig.getSleep());
				}
			//	log.debug("页面信息："+htmlPage.asXml());
				document = Jsoup.parse(htmlPage.asXml(), pageConfig.getUrl());
			} else {
				log.debug(url+"===[不是htmlpage]");
				document = Jsoup.parse("");
				Elements elements = document.getElementsByTag("body");
				String script = "<script type=\"text/javascript\">window.location.href='" + url + "'</script>";
				elements.get(0).append(script);
			}
			//页内处理js跳转cx-20171019------------------------------------------------------------//
//			Elements links = document.select("a[href],area[href],iframe[src]");
//			for (Element e : links) {
//				String href = e.attr("abs:href").trim();
//				if (href.equals("")) {
//					href = e.attr("abs:src").trim();
//					if (href == null) {
//						continue;
//					}
//				}
//				// 过滤
//				String s = dao.filterSuffix(href,domain);
//				
//				if(!s.startsWith("http:")){
//					HtmlPage htmlPageOld = new HtmlPage(null, response, w0);
//					htmlPageOld = htmlPage;
//					HtmlAnchor ha = htmlPageOld.getAnchorByHref(s);
//					String urlNew = ha.click().getUrl().toString();
//					//HtmlPage htmlPage2 = ha.click();
//					//webClient.getWebWindows().get(0).getHistory().back();
//					HtmlAnchor haOld = htmlPage.getAnchorByHref(s);
//					haOld.setAttribute("href",urlNew);
//					s = urlNew;
//					htmlPageOld = null;
//					System.out.println(s);
//				}
//				// 保存url到数数库
//				//mongoDBUtil.downloadsInsert(s);
//			}
			//--------------------------------------------------------------------------------//
			htmlPage = null;
			return document;
		} catch (InterruptedException e) {
			log.error(e);
		} catch (FailingHttpStatusCodeException e) {
			log.error(e);
		} catch (MalformedURLException e) {
			log.error(e);
		} catch (IOException e) {
			log.error(e);
		} finally {
			if (webClient != null) {
				webClient.close();
			}
		}
		return null;

	}

}
